# Measures relationship between essay content and SAT scores by income decile
# Used to make Figure 3

library(tidyverse)

merged <- read.csv("merged_final.csv")
merged_df <- merged %>%
  filter(FAMILY_INCOME > 10000)
fi_cuts <- quantile(na.omit(merged_df$FAMILY_INCOME),
                    probs = c(.1,.2,.3,.4,.5,.6,.7,.8,.9))
merged_df$fi <- merged_df$FAMILY_INCOME

merged_df <- merged_df %>%
  mutate(percentile = case_when(fi <= fi_cuts[1] ~ 1,
                                fi > fi_cuts[1] & fi <= fi_cuts[2] ~ 2,
                                fi > fi_cuts[2] & fi <= fi_cuts[3] ~ 3,
                                fi > fi_cuts[3] & fi <= fi_cuts[4] ~ 4,
                                fi > fi_cuts[4] & fi <= fi_cuts[5] ~ 5,
                                fi > fi_cuts[5] & fi <= fi_cuts[6] ~ 6,
                                fi > fi_cuts[6] & fi <= fi_cuts[7] ~ 7,
                                fi > fi_cuts[7] & fi <= fi_cuts[8] ~ 8,
                                fi > fi_cuts[8] & fi <= fi_cuts[9] ~ 9,
                                fi > fi_cuts[9] ~ 10))

just_essays <- list()

one_sat_perc <-  merged_df %>%
  filter(percentile == 1)
one_gamma <- as.matrix(log(one_sat_perc[,6:75]))
one_gamma <- one_gamma[,-c(33)]

one_mod <- lm(RSAT_TOTAL_SCORE ~ one_gamma,
              data = one_sat_perc)
summary(one_mod)
just_essays <- append(list(summary(one_mod)$adj.r.squared, "One"),just_essays)
###

second_sat_perc <-  merged_df %>%
  filter(percentile == 2)
second_gamma <- as.matrix(log(second_sat_perc[,6:75]))
second_gamma <- second_gamma[,-c(17)]

second_mod <- lm(RSAT_TOTAL_SCORE ~ second_gamma,
                 data = second_sat_perc)

summary(second_mod)

just_essays <- append(list(summary(second_mod)$adj.r.squared, "Two"),just_essays)
###


three_sat_perc <-  merged_df %>%
  filter(percentile == 3)
three_gamma <- as.matrix(log(three_sat_perc[,6:75]))
three_gamma <- three_gamma[,-c(31)]

three_mod <- lm(RSAT_TOTAL_SCORE ~ three_gamma,
                data = three_sat_perc)

summary(three_mod)

just_essays <- append(list(summary(three_mod)$adj.r.squared, "Three"),
                      just_essays)
###


four_sat_perc <-  merged_df %>%
  filter(percentile == 4)
four_gamma <- as.matrix(log(four_sat_perc[,6:75]))
four_gamma <- four_gamma[,-c(7)]

four_mod <- lm(RSAT_TOTAL_SCORE ~ four_gamma,
               data = four_sat_perc)

summary(four_mod)

just_essays <- append(list(summary(four_mod)$adj.r.squared, "Four"),just_essays)
###

five_sat_perc <-  merged_df %>%
  filter(percentile == 5)
five_gamma <- as.matrix(log(five_sat_perc[,6:75]))
five_gamma <- five_gamma[,-c(18)]

five_mod <- lm(RSAT_TOTAL_SCORE ~ five_gamma,
               data = five_sat_perc)

summary(five_mod)

just_essays <- append(list(summary(five_mod)$adj.r.squared, "Five"),just_essays)
###


six_sat_perc <-  merged_df %>%
  filter(percentile == 6)
six_gamma <- as.matrix(log(six_sat_perc[,6:75]))
six_gamma <- six_gamma[,-c(37)]

six_mod <- lm(RSAT_TOTAL_SCORE ~six_gamma,
              data = six_sat_perc)

summary(six_mod)

just_essays <- append(list(summary(six_mod)$adj.r.squared, "Six"),just_essays)
###



sev_sat_perc <-  merged_df %>%
  filter(percentile == 7)
sev_gamma <- as.matrix(log(sev_sat_perc[,6:75]))
sev_gamma <- sev_gamma[,-c(39)]

sev_mod <- lm(RSAT_TOTAL_SCORE ~ sev_gamma,
              data = sev_sat_perc)

summary(sev_mod)

just_essays <- append(list(summary(sev_mod)$adj.r.squared, "Seven"),just_essays)
###


eight_sat_perc <-  merged_df %>%
  filter(percentile == 8)
eight_gamma <- as.matrix(log(eight_sat_perc[,6:75]))
eight_gamma <- eight_gamma[,-c(39)]

eight_mod <- lm(RSAT_TOTAL_SCORE ~eight_gamma,
                data = eight_sat_perc)

summary(eight_mod)

just_essays <- append(list(summary(eight_mod)$adj.r.squared, "Eight"),
                      just_essays)
###


ninth_sat_perc <-  merged_df %>%
  filter(percentile == 9)
ninth_gamma <- as.matrix(log(ninth_sat_perc[,6:75]))
ninth_gamma <- ninth_gamma[,-c(50)]

ninth_mod <- lm(RSAT_TOTAL_SCORE ~ninth_gamma,
                data = ninth_sat_perc)

summary(ninth_mod)

just_essays <- append(list(summary(ninth_mod)$adj.r.squared, "Nine"),just_essays)
###


ten_sat_perc <-  merged_df %>%
  filter(percentile == 10)
ten_gamma <- as.matrix(log(ten_sat_perc[,6:75]))
ten_gamma <- ten_gamma[,-c(23)]

ten_mod <- lm(RSAT_TOTAL_SCORE ~ ten_gamma,
              data = ten_sat_perc)

summary(ten_mod)

just_essays <- append(list(summary(ten_mod)$adj.r.squared, "Ten"),just_essays)

just_essays

################################################################################
# Regress essay style on SAT score by income decile

just_essays_dict <- list()

one_sat_perc <-  merged_df %>%
  filter(percentile == 1)
one_gamma <- as.matrix(merged_df[,76:167])

one_mod <- lm(RSAT_TOTAL_SCORE ~ one_gamma,
              data = one_sat_perc)
summary(one_mod)
just_essays_dict <- append(list(summary(one_mod)$adj.r.squared, "One"),just_essays_dict)
###

second_sat_perc <-  merged_df %>%
  filter(percentile == 2)
second_gamma <- as.matrix(merged_df[,76:167])

second_mod <- lm(RSAT_TOTAL_SCORE ~ second_gamma,
                 data = second_sat_perc)

summary(second_mod)

just_essays_dict <- append(list(summary(second_mod)$adj.r.squared, "Two"),just_essays_dict)
###


three_sat_perc <-  merged_df %>%
  filter(percentile == 3)
three_gamma <- as.matrix(merged_df[,76:167])

three_mod <- lm(RSAT_TOTAL_SCORE ~ three_gamma,
                data = three_sat_perc)

summary(three_mod)

just_essays_dict <- append(list(summary(three_mod)$adj.r.squared, "Three"),
                      just_essays_dict)
###


four_sat_perc <-  merged_df %>%
  filter(percentile == 4)
four_gamma <- as.matrix(merged_df[,76:167])

four_mod <- lm(RSAT_TOTAL_SCORE ~ four_gamma,
               data = four_sat_perc)

summary(four_mod)

just_essays_dict <- append(list(summary(four_mod)$adj.r.squared, "Four"),just_essays_dict)
###

five_sat_perc <-  merged_df %>%
  filter(percentile == 5)
five_gamma <- as.matrix(merged_df[,76:167])

five_mod <- lm(RSAT_TOTAL_SCORE ~ five_gamma,
               data = five_sat_perc)

summary(five_mod)

just_essays_dict <- append(list(summary(five_mod)$adj.r.squared, "Five"),just_essays_dict)
###


six_sat_perc <-  merged_df %>%
  filter(percentile == 6)
six_gamma <- as.matrix(merged_df[,76:167])

six_mod <- lm(RSAT_TOTAL_SCORE ~six_gamma,
              data = six_sat_perc)

summary(six_mod)

just_essays_dict <- append(list(summary(six_mod)$adj.r.squared, "Six"),just_essays_dict)
###



sev_sat_perc <-  merged_df %>%
  filter(percentile == 7)
sev_gamma <- as.matrix(merged_df[,76:167])

sev_mod <- lm(RSAT_TOTAL_SCORE ~ sev_gamma,
              data = sev_sat_perc)

summary(sev_mod)

just_essays_dict <- append(list(summary(sev_mod)$adj.r.squared, "Seven"),just_essays_dict)
###


eight_sat_perc <-  merged_df %>%
  filter(percentile == 8)
eight_gamma <- as.matrix(merged_df[,76:167])

eight_mod <- lm(RSAT_TOTAL_SCORE ~eight_gamma,
                data = eight_sat_perc)

summary(eight_mod)

just_essays_dict <- append(list(summary(eight_mod)$adj.r.squared, "Eight"),
                      just_essays_dict)
###


ninth_sat_perc <-  merged_df %>%
  filter(percentile == 9)
ninth_gamma <- as.matrix(merged_df[,76:167])

ninth_mod <- lm(RSAT_TOTAL_SCORE ~ninth_gamma,
                data = ninth_sat_perc)

summary(ninth_mod)

just_essays_dict <- append(list(summary(ninth_mod)$adj.r.squared, "Nine"),just_essays_dict)
###


ten_sat_perc <-  merged_df %>%
  filter(percentile == 10)
ten_gamma <- as.matrix(merged_df[,76:167])

ten_mod <- lm(RSAT_TOTAL_SCORE ~ ten_gamma,
              data = ten_sat_perc)

summary(ten_mod)

just_essays_dict <- append(list(summary(ten_mod)$adj.r.squared, "Ten"),just_essays_dict)

just_essays_dict